# Start capturing output to a log file
sink("replication_log.txt", append = FALSE, split = TRUE)
cat("Replication Log for 'External Threat and Nuclear Preferences: Micro-Level Insights from the Iran-Israel Confrontation'\n")
cat("Generated on:", format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\n\n")

# Clear workspace
rm(list = ls())

# Load required libraries
library(tidyverse)
library(tidytext)
library(wordcloud)
library(tm)
library(readxl)
library(xtable)
library(ggplot2)
library(tidyr)
library(dplyr)
library(haven)
library(MatchIt)
library(tableone)
library(cobalt)
library(reshape2)

cat("Libraries loaded successfully.\n")

# --- Text Analysis (survey_text.xlsx) ---
cat("\n=== Text Analysis ===\n")

# Load the Excel file
mydata_text <- read_excel("survey_text.xlsx")
cat("Loaded survey_text.xlsx\n")
print(head(mydata_text$nuclear_content))

# Remove rows where nuclear_content is NA
mydata_text <- mydata_text %>% filter(!is.na(nuclear_content))
cat("Dimensions after removing NA in nuclear_content:", dim(mydata_text), "\n")


# --- Thematic Analysis ---
cat("\n=== Thematic Analysis ===\n")

# Filter out rows where all themes are 0
mydata_text <- mydata_text %>%
  filter(!(theme_deterrence == 0 & theme_prestige == 0 & theme_progress == 0 & theme_rights == 0))
cat("Dimensions after filtering zero themes:", dim(mydata_text), "\n")

# Count the frequency of each theme
theme_counts <- mydata_text %>%
  summarise(
    Deterrence = sum(theme_deterrence),
    Prestige = sum(theme_prestige),
    Progress = sum(theme_progress),
    Rights = sum(theme_rights)
  )
cat("Theme counts:\n")
print(theme_counts)

# Convert to proportions
theme_proportions <- theme_counts / nrow(mydata_text)
cat("Theme proportions:\n")
print(theme_proportions)

# Count co-occurrence of each pair of themes
cooccurrence <- mydata_text %>%
  summarise(
    Deterrence_Prestige = sum(theme_deterrence & theme_prestige),
    Deterrence_Progress = sum(theme_deterrence & theme_progress),
    Deterrence_Rights = sum(theme_deterrence & theme_rights),
    Prestige_Progress = sum(theme_prestige & theme_progress),
    Prestige_Rights = sum(theme_prestige & theme_rights),
    Progress_Rights = sum(theme_progress & theme_rights)
  )
cat("Theme co-occurrence:\n")
print(cooccurrence)

# --- Pre and Post Samples ---
cat("\n=== Pre and Post Samples ===\n")

# Create Period variable
mydata_text$StartDate <- as.numeric(mydata_text$StartDate)
cat("StartDate summary:\n")
print(summary(mydata_text$StartDate))

mydata_text$StartDate <- as.Date(mydata_text$StartDate, origin = "1899-12-30")
mydata_text$period <- ifelse(mydata_text$StartDate < as.Date("2024-04-02"), "Before", "After")
mydata_text <- mydata_text[format(mydata_text$StartDate, "%m-%d") != "02-18", ]
cat("Period distribution:\n")
print(table(mydata_text$period))

# Filter into pre- and post-confrontation subsets
pre_confrontation <- mydata_text %>% filter(period == "Before")
post_confrontation <- mydata_text %>% filter(period == "After")
cat("Number of responses by period:\n")
print(table(mydata_text$period))

# Calculate theme percentages by period
pre_theme_distribution <- pre_confrontation %>%
  summarise(
    Deterrence = mean(theme_deterrence, na.rm = TRUE),
    Prestige = mean(theme_prestige, na.rm = TRUE),
    Progress = mean(theme_progress, na.rm = TRUE),
    Rights = mean(theme_rights, na.rm = TRUE)
  )
post_theme_distribution <- post_confrontation %>%
  summarise(
    Deterrence = mean(theme_deterrence, na.rm = TRUE),
    Prestige = mean(theme_prestige, na.rm = TRUE),
    Progress = mean(theme_progress, na.rm = TRUE),
    Rights = mean(theme_rights, na.rm = TRUE)
  )
theme_comparison <- rbind(Pre_Confrontation = pre_theme_distribution, Post_Confrontation = post_theme_distribution)
cat("Theme comparison (pre vs. post):\n")
print(theme_comparison)

# --- Survey Analysis (survey.xlsx) ---
cat("\n=== Survey Analysis ===\n")

# Load the Excel file
mydata <- read_excel("survey.xlsx")
cat("Loaded survey.xlsx\n")
mydata <- mydata[-1, ] # Drop first observation
cat("Dropped first observation. New dimensions:", dim(mydata), "\n")

# Create Period variable
mydata$StartDate <- as.numeric(mydata$StartDate)
cat("StartDate summary:\n")
print(summary(mydata$StartDate))
mydata$StartDate <- as.Date(mydata$StartDate, origin = "1899-12-30")
mydata$period <- ifelse(mydata$StartDate < as.Date("2024-04-02"), "Before", "After")
mydata <- mydata[format(mydata$StartDate, "%m-%d") != "02-18", ]
cat("Period distribution:\n")
print(table(mydata$period))

# Rename and process dependent variable
mydata <- mydata %>% rename(nuclear = `mil-tech_2`)
mydata$nuclear <- as.numeric(mydata$nuclear)
cat("Nuclear variable distribution:\n")
print(table(mydata$nuclear))
cat("Nuclear variable structure:\n")
print(str(mydata$nuclear))

# Create binary nuclear variable
mydata$nuclear_binary <- ifelse(mydata$nuclear == 1, 1, 0)
cat("Nuclear binary variable distribution:\n")
print(table(mydata$nuclear_binary))

# Calculate proportions by period
filtered_data <- mydata %>% filter(!is.na(nuclear) & !is.na(period))
descriptive_stats <- filtered_data %>%
  group_by(period, nuclear) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(period) %>%
  mutate(
    proportion = count / sum(count) * 100,
    label = paste0(round(proportion, 1), "%")
  )
descriptive_stats$period <- factor(descriptive_stats$period, levels = c("Before", "After"))
cat("Nuclear support by period:\n")
print(descriptive_stats)

# --- Regression Analysis ---
cat("\n=== Regression Analysis ===\n")

# Code period: After = 1, Before = 0
mydata_text$period <- ifelse(mydata_text$period == "After", 1, 0)

# Logistic regression models for each theme
deterrence_model <- glm(theme_deterrence ~ period, family = "binomial", data = mydata_text)
prestige_model <- glm(theme_prestige ~ period, family = "binomial", data = mydata_text)
progress_model <- glm(theme_progress ~ period, family = "binomial", data = mydata_text)
rights_model <- glm(theme_rights ~ period, family = "binomial", data = mydata_text)

# Function to extract odds ratios
extract_odds <- function(model, theme) {
  data.frame(
    Theme = theme,
    Odds_Ratio = exp(coef(model)[2]),
    CI_Lower = exp(confint(model)[2, 1]),
    CI_Upper = exp(confint(model)[2, 2]),
    p_value = summary(model)$coefficients[2, 4]
  )
}

# Combine odds ratios
odds_ratios <- rbind(
  extract_odds(deterrence_model, "Deterrence"),
  extract_odds(prestige_model, "Prestige"),
  extract_odds(progress_model, "Progress"),
  extract_odds(rights_model, "Rights")
)
cat("Odds ratios for theme models:\n")
print(odds_ratios)

# Recode variables
mydata$period_binary <- ifelse(mydata$StartDate >= as.Date("2024-04-02"), 1, 0)
cat("Period binary distribution:\n")
print(table(mydata$period_binary))

names(mydata)[names(mydata) == "patriotism"] <- "national_identity"
names(mydata)[names(mydata) == "religion-important"] <- "religiosity"
names(mydata)[names(mydata) == "employment-state"] <- "employment"
names(mydata)[names(mydata) == "family-military"] <- "fam_military"
names(mydata)[names(mydata) == "war-fam-expr"] <- "veteran_family"

mydata$national_identity <- ifelse(mydata$national_identity == 1, 1, 0)
mydata$religiosity <- ifelse(mydata$religiosity == 1, 1, 0)
mydata$conservative <- ifelse(mydata$vote96 %in% c(2, 3), 1, 0)
mydata$reformist <- ifelse(mydata$vote96 %in% c(1, 4), 1, 0)
mydata$college_education <- ifelse(mydata$education %in% c(3, 4, 5, 6), 1, 0)
mydata$no_college_education <- ifelse(mydata$education %in% c(1, 2, 7), 1, 0)
mydata$below30 <- ifelse(mydata$age %in% c(1, 2, 3), 1, 0)
mydata$above30 <- ifelse(mydata$age %in% c(4:13), 1, 0)
mydata <- mydata[mydata$gender != 3, ]
mydata$gender <- ifelse(mydata$gender == 1, 1, 0)
mydata$male <- ifelse(mydata$gender == 1, 1, 0)
mydata$female <- ifelse(mydata$gender == 0, 1, 0)
mydata$low_income <- ifelse(mydata$income %in% c(1, 2), 1, 0)
mydata$high_income <- ifelse(mydata$income %in% c(4, 5, 6, 7), 1, 0)
mydata$middle_income <- ifelse(mydata$income == 3, 1, 0)
mydata$fam_military <- ifelse(mydata$fam_military != "4", 1, 0)
mydata <- mydata[mydata$veteran_family != 3, ]
mydata$veteran_family <- ifelse(mydata$veteran_family == 1, 1, 0)
mydata$education_recode <- ifelse(mydata$education == 7, 2, mydata$education)

cat("Recoded variable distributions:\n")
print(table(mydata$national_identity))
print(table(mydata$religiosity))
print(table(mydata$conservative))
print(table(mydata$reformist))
print(table(mydata$college_education))
print(table(mydata$no_college_education))
print(table(mydata$below30))
print(table(mydata$above30))
print(table(mydata$gender))
print(table(mydata$low_income))
print(table(mydata$veteran_family))

# Regression models
bivariate_model <- glm(nuclear_binary ~ period_binary, family = "binomial", data = mydata)
cat("Bivariate model summary:\n")
print(summary(bivariate_model))

model_demographics <- glm(nuclear_binary ~ period_binary + gender + below30 + college_education + low_income,
                          family = "binomial", data = mydata)
cat("Demographics model summary:\n")
print(summary(model_demographics))

model_full <- glm(nuclear_binary ~ period_binary + gender + below30 + college_education + low_income +
                    religiosity + national_identity + conservative + reformist + fam_military + veteran_family,
                  family = "binomial", data = mydata)
cat("Full model summary:\n")
print(summary(model_full))

cat("Odds ratios for regression models:\n")
print(exp(coef(bivariate_model)))
print(exp(coef(model_demographics)))
print(exp(coef(model_full)))

# --- Plotting Figures in Order: Figure 1, Figure 2, Figure 3, Figure 4 ---

# Figure 1: Comparison of Support Levels
ggplot(descriptive_stats, aes(x = period, y = proportion, fill = factor(nuclear))) +
  geom_bar(stat = "identity", position = "stack", width = 0.8) +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3.5, color = "white") +
  scale_fill_manual(values = c("gray5", "gray30", "gray50", "gray70"),
                    name = "Support Level",
                    labels = c("Completely Agree", "Somewhat Agree", "Somewhat Disagree", "Completely Disagree")) +
  labs(title = "", x = "Period", y = "Percentage") +
  theme_minimal()
ggsave("figures/figure_1_nuclear_by_period_barplot.png")
cat("Saved Figure 1: Comparison of Support Levels to figures/figure_1_nuclear_by_period_barplot.png\n")

# Figure 2: Odds Ratios for Support of Nuclear Weapons Acquisition
extract_odds_ratios <- function(model, model_name) {
  tidy_model <- tidy(model, exponentiate = TRUE, conf.int = TRUE, conf.level = 0.90)
  tidy_model$model <- model_name
  return(tidy_model)
}
or_bivariate <- extract_odds_ratios(bivariate_model, "Model 1: Bivariate")
or_demographics <- extract_odds_ratios(model_demographics, "Model 2: Demographics")
or_full <- extract_odds_ratios(model_full, "Model 3: Full Model")
odds_ratios_combined <- rbind(or_bivariate, or_demographics, or_full) %>%
  filter(term != "(Intercept)")
variable_labels <- c(
  "period_binary" = "Post-Confrontation",
  "gender" = "Gender (Male)",
  "below30" = "Age Below 30",
  "college_education" = "College Education",
  "low_income" = "Low Income",
  "religiosity" = "Religiosity",
  "national_identity" = "National Identity",
  "conservative" = "Conservative",
  "reformist" = "Reformist",
  "fam_military" = "Military Family",
  "veteran_family" = "Veteran Family"
)
odds_ratios_combined$term_clean <- variable_labels[as.character(odds_ratios_combined$term)]
odds_ratios_combined$term_clean[is.na(odds_ratios_combined$term_clean)] <- odds_ratios_combined$term
term_order <- c(
  "Post-Confrontation", "Gender (Male)", "Age Below 30", "College Education", "Low Income",
  "Religiosity", "National Identity", "Conservative", "Reformist", "Military Family", "Veteran Family"
)
odds_ratios_combined$term_clean <- factor(odds_ratios_combined$term_clean, levels = rev(term_order))
ggplot(odds_ratios_combined, aes(x = term_clean, y = estimate, shape = model, group = model, fill = model)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.7), color = "black") +
  scale_shape_manual(values = c(21, 22, 23)) +
  scale_fill_manual(values = c("gray20", "gray50", "gray80")) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "gray40") +
  labs(title = "", x = "Predictor Variable", y = "Odds Ratio", shape = "Model", fill = "Model") +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 10, face = "plain"), axis.text.x = element_text(size = 10), axis.title.y = element_blank()) +
  coord_flip()
ggsave("figures/figure_2_odds_ratios_models.png")
cat("Saved Figure 2: Odds Ratios for Support of Nuclear Weapons Acquisition to figures/figure_2_odds_ratios_models.png\n")




# Figure 3: Thematic Proportions Before and After the Confrontation
theme_proportions <- tibble::tibble(
  Period = c("Before", "After"),
  Deterrence = c(0.719, 0.788),
  Prestige = c(0.230, 0.235),
  Progress = c(0.163, 0.156),
  Rights = c(0.157, 0.142)
)
theme_proportions_long <- theme_proportions %>%
  pivot_longer(cols = -Period, names_to = "Theme", values_to = "Proportion") %>%
  mutate(Period = factor(Period, levels = c("Before", "After")))
ggplot(theme_proportions_long, aes(x = Theme, y = Proportion, fill = Period)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), color = "black", width = 0.7) +
  geom_text(aes(label = scales::percent(Proportion, accuracy = 1)), 
            position = position_dodge(width = 0.8), vjust = -0.3, size = 3.5) +
  scale_fill_grey(start = 0.8, end = 0.4) +
  labs(title = "", x = "", y = "Proportion", fill = "") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "top")
ggsave("figures/figure_3_theme_proportions_barplot.png")
cat("Saved Figure 3: Thematic Proportions Before and After the Confrontation to figures/figure_3_theme_proportions_barplot.png\n")

# Figure 4: Odds Ratio: Comparing Motivations Before and After the Confrontation
odds_ratios$Theme <- factor(odds_ratios$Theme, levels = c("Rights", "Progress", "Prestige", "Deterrence"))
ggplot(odds_ratios, aes(x = Theme, y = Odds_Ratio)) +
  geom_pointrange(aes(ymin = CI_Lower, ymax = CI_Upper), color = "gray30", size = 1.2, fatten = 2) +
  geom_point(color = "black", size = 3) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "gray50", size = 1) +
  labs(title = "", subtitle = "", x = "Theme", y = "Odds Ratio") +
  theme_minimal(base_size = 12) +
  coord_flip() +
  theme(
    plot.title = element_text(face = "plain", size = 16, color = "black"),
    plot.subtitle = element_text(size = 12, color = "gray30"),
    axis.text.y = element_text(size = 12, face = "plain", color = "black"),
    axis.text.x = element_text(size = 10, color = "black"),
    panel.grid.major.x = element_line(color = "gray90"),
    legend.position = "none"
  )
ggsave("figures/figure_4_odds_ratios_themes.png")
cat("Saved Figure 4: Odds Ratio: Comparing Motivations Before and After the Confrontation to figures/figure_4_odds_ratios_themes.png\n")





# End of analysis
cat("\nAnalysis completed.\n")
sink()